# analysis_section_4_3_part1.R
# This script replicates the analysis in Section 4.3 (Part 1) of the paper
#   Figure 6, Figure 7, Figure SI.12, Figure SI.13, Figure SI.14, and Figure SI.15
# Author: Yimeng Li
# Dependencies:
#   Data/VR_VoteCal_LA.Rdata

# Load libraries:
library(dplyr)
library(ggplot2)
library(patchwork)
library(purrr)
library(rdrobust)

# Load data:
load("./Data/VR_VoteCal_LA.Rdata")

# Prepare data for analysis
Voters_NPAV_within_10km <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Non-Perm. VBM", abs_dist <= 10000) %>%
  select(Vote_by_Mail.2020, Turnout.2020, Dist, dist_from_boundary, PoliticalParty)

#*******************************************************************************
# Geographic Boundary-Based RD Analysis
#   Results Reported in Figure 6
#*******************************************************************************

# Percent Voting by Mail
result_NPAV_VBM <- rdrobust(
  y = Voters_NPAV_within_10km$Vote_by_Mail.2020[Voters_NPAV_within_10km$Turnout.2020 == 1]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$Turnout.2020 == 1],
  cluster = Voters_NPAV_within_10km$Dist[Voters_NPAV_within_10km$Turnout.2020 == 1]
)

# Voter Turnout
result_NPAV <- rdrobust(
  y = Voters_NPAV_within_10km$Turnout.2020*100,
  x = Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

#*******************************************************************************
# Figure 6
#   Effects on Percent Voting by Mail and Voter Turnout, RD Estimates
#*******************************************************************************

# Top Panel (Percent Voting by Mail)
plot_NPAV_VBM <- rdplot(
  y = Voters_NPAV_within_10km$Vote_by_Mail.2020[Voters_NPAV_within_10km$Turnout.2020 == 1]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$Turnout.2020 == 1],
  nbin = 1000,
  title = "Estimated Effect: 23.5, Clustered Standard Error: 1.3",
  x.label = "Distance from Boundary (meters)",
  y.label = "% VBM (percentage points)",
  y.lim = c(0, 80),
  col.dots = "skyblue"
)

# Bottom Panel (Voter Turnout)
plot_NPAV <- rdplot(
  y = Voters_NPAV_within_10km$Turnout.2020*100,
  x = Voters_NPAV_within_10km$dist_from_boundary,
  nbin = 1000,
  title = "Estimated Effect: 3.7, Clustered Standard Error: 0.7",
  x.label = "Distance from Boundary (meters)",
  y.label = "Turnout (percentage points)",
  y.lim = c(20, 60),
  col.dots = "skyblue"
)

# Full Figure
Figure_NPAV_main <- plot_NPAV_VBM$rdplot / plot_NPAV$rdplot

# Save Figure
ggsave("./Replication Package/Figure 6.pdf", Figure_NPAV_main,
       width = 8, height = 8, units = "in", dpi = 600)

#*******************************************************************************
# Geographic Boundary-Based RD Analysis by Party
#   Results Reported in Figure 7
#*******************************************************************************

# Registred Democrats
result_NPAV_Dem <- rdrobust(
  y = Voters_NPAV_within_10km$Turnout.2020[Voters_NPAV_within_10km$PoliticalParty == "Democratic"]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$PoliticalParty == "Democratic"],
  cluster = Voters_NPAV_within_10km$Dist[Voters_NPAV_within_10km$PoliticalParty == "Democratic"]
)

# Registred Republicans
result_NPAV_Rep <- rdrobust(
  y = Voters_NPAV_within_10km$Turnout.2020[Voters_NPAV_within_10km$PoliticalParty == "Republican"]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$PoliticalParty == "Republican"],
  cluster = Voters_NPAV_within_10km$Dist[Voters_NPAV_within_10km$PoliticalParty == "Republican"]
)

# Registered NPA Voters
result_NPAV_NPA <- rdrobust(
  y = Voters_NPAV_within_10km$Turnout.2020[Voters_NPAV_within_10km$PoliticalParty == "No Party Preference"]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$PoliticalParty == "No Party Preference"],
  cluster = Voters_NPAV_within_10km$Dist[Voters_NPAV_within_10km$PoliticalParty == "No Party Preference"]
)

#*******************************************************************************
# Figure 7
#   Effects on Voter Turnout by Party Registration, RD Estimates
#*******************************************************************************

# Registered Democrats
plot_NPAV_Dem <- rdplot(
  y = Voters_NPAV_within_10km$Turnout.2020[Voters_NPAV_within_10km$PoliticalParty == "Democratic"]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$PoliticalParty == "Democratic"],
  nbin = 1000,
  title = "Estimated Effect: 5.2, Clustered Standard Error: 1.1",
  x.label = "Distance from Boundary (meters)",
  y.label = "Turnout (percentage points)",
  y.lim = c(10, 60),
  col.dots = "skyblue"
)

# Registered Republicans
plot_NPAV_Rep <- rdplot(
  y = Voters_NPAV_within_10km$Turnout.2020[Voters_NPAV_within_10km$PoliticalParty == "Republican"]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$PoliticalParty == "Republican"],
  nbin = 1000,
  title = "Estimated Effect: 4.7, Clustered Standard Error: 2.4",
  x.label = "Distance from Boundary (meters)",
  y.label = "Turnout (percentage points)",
  y.lim = c(10, 60),
  col.dots = "skyblue"
)

# Registered NPA Voters
plot_NPAV_NPA <- rdplot(
  y = Voters_NPAV_within_10km$Turnout.2020[Voters_NPAV_within_10km$PoliticalParty == "No Party Preference"]*100,
  x = Voters_NPAV_within_10km$dist_from_boundary[Voters_NPAV_within_10km$PoliticalParty == "No Party Preference"],
  nbin = 1000,
  title = "Estimated Effect: 1.9, Clustered Standard Error: 0.8",
  x.label = "Distance from Boundary (meters)",
  y.label = "Turnout (percentage points)",
  y.lim = c(10, 60),
  col.dots = "skyblue"
)

# Full Figure
Figure_NPAV_All_Parties <- plot_NPAV_Dem$rdplot / plot_NPAV_Rep$rdplot / plot_NPAV_NPA$rdplot

# Save Figure
ggsave("./Replication Package/Figure 7.pdf", Figure_NPAV_All_Parties,
       width = 8, height = 12, units = "in", dpi = 600)

#*******************************************************************************
# Additional Analysis: Geographic Boundary-Based RD Analysis for Permanent VBM Voters
#   Results Reported in Figure SI.12
#*******************************************************************************

# Prepare data for analysis
Voters_PAV_within_10km <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Perm. VBM", abs_dist <= 10000) %>%
  select(Vote_by_Mail.2020, Turnout.2020, Dist, dist_from_boundary)

# Percent Voting by Mail
result_PAV_VBM <- rdrobust(
  y = Voters_PAV_within_10km$Vote_by_Mail.2020[Voters_PAV_within_10km$Turnout.2020 == 1]*100,
  x = Voters_PAV_within_10km$dist_from_boundary[Voters_PAV_within_10km$Turnout.2020 == 1],
  cluster = Voters_PAV_within_10km$Dist[Voters_PAV_within_10km$Turnout.2020 == 1]
)

# Voter Turnout
result_PAV <- rdrobust(
  y = Voters_PAV_within_10km$Turnout.2020*100,
  x = Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

#*******************************************************************************
# Figure SI.12
#   RD Estimates (Permanent VBM Voters)
#*******************************************************************************

# Top Panel (Percent Voting by Mail)
plot_PAV_VBM <- rdplot(
  y = Voters_PAV_within_10km$Vote_by_Mail.2020[Voters_PAV_within_10km$Turnout.2020 == 1]*100,
  x = Voters_PAV_within_10km$dist_from_boundary[Voters_PAV_within_10km$Turnout.2020 == 1],
  nbin = 1000,
  title = "Estimated Effect: 4.6, Clustered Standard Error: 1.4",
  x.label = "Distance from Boundary (meters)",
  y.label = "% VBM (percentage points)",
  y.lim = c(20, 100),
  col.dots = "skyblue"
)

# Bottom Panel (Voter Turnout)
plot_PAV <- rdplot(
  y = Voters_PAV_within_10km$Turnout.2020*100,
  x = Voters_PAV_within_10km$dist_from_boundary,
  nbin = 1000,
  title = "Estimated Effect: 1.6, Clustered Standard Error: 1.3",
  x.label = "Distance from Boundary (meters)",
  y.label = "Turnout (percentage points)",
  y.lim = c(20, 60),
  col.dots = "skyblue"
)

# Full Figure
Figure_PAV_main <- plot_PAV_VBM$rdplot / plot_PAV$rdplot

# Save Figure
ggsave("./Replication Package/Figure SI.12.pdf", Figure_PAV_main,
       width = 8, height = 8, units = "in", dpi = 600)

#*******************************************************************************
# Additional Analysis: Geographic Boundary-Based RD Analysis with Alternative Maximal Distances from Boundary
#   Results Reported in Figure SI.13
#*******************************************************************************

# Prepare data for analysis
Voters_NPAV_All <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Non-Perm. VBM") %>%
  select(Vote_by_Mail.2020, Turnout.2020, Dist, dist_from_boundary, abs_dist)

# Function to calculate RD estimates for alternative maximal distances from boundary
get_estimate_NPAV_range <- function(d) {
  
  # The only UVBM areas over 13km away from the boundary are the two islands
  Voters_NPAV_range <- Voters_NPAV_All %>%
    filter(dist_from_boundary >= (-1)*d,
           dist_from_boundary <= min(d, 13000))
  
  result_NPAV_range <- rdrobust(Voters_NPAV_range$Turnout.2020,
                                Voters_NPAV_range$dist_from_boundary,
                                cluster = Voters_NPAV_range$Dist)
  
  return(result_NPAV_range$Estimate)
}

# Obtain RD estimates for alternative maximal distances from boundary
result_NPAV_range <- map(c(seq(2000, 72000, 2000)), get_estimate_NPAV_range)

# Save the results for future reference (to avoid re-running the analysis)
# save(result_NPAV_range, file = "./Replication Package/result_NPAV_range.RData")

# Collect Point Estimates, Clustered SE, and 95% CIs in a Dataframe
result_NPAV_range_df <-
  do.call(rbind, result_NPAV_range) %>%
  as.data.frame() %>%
  mutate(
    estimate = tau.bc*100,
    se = se.rb*100,
    distance = c(seq(2000, 72000, 2000))/1000,
    lower = estimate + qnorm(0.025)*se,
    upper = estimate + qnorm(0.975)*se
  )

#*******************************************************************************
# Figure SI.13
#   Sensitivity Analysis - Maximal Distance
#*******************************************************************************

# Figure SI.13
Figure_NPAV_range <- ggplot(result_NPAV_range_df, aes(x = distance, y = estimate)) +
  geom_point(color = "blue", size = 2) +
  geom_linerange(aes(ymin = lower, ymax = upper)) +
  scale_x_continuous(limits = c(2, 72), breaks = seq(0, 70, 10)) +
  scale_y_continuous(limits = c(0, 7), breaks = seq(0, 6, 2)) +
  labs(title = "",
       x = "Maximal Distance from Boundary (kilometers)",
       y = "RD Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14))

# Save Figure
ggsave("./Replication Package/Figure SI.13.pdf", Figure_NPAV_range,
       width = 8, height = 4, units = "in", dpi = 600)

#*******************************************************************************
# Additional Analysis: Geographic Boundary-Based RD Analysis with Donut RD Design
#   Results Reported in Figure SI.14
#*******************************************************************************

# Among voters within 1 kilometer of the boundary, identify 1-10% of those closest to the boundary
q_abs_dist_NPAV <- quantile(Voters_NPAV_All$abs_dist[Voters_NPAV_All$abs_dist <= 1000],
                            probs = seq(0, 0.1, 0.01), na.rm = TRUE)

# Function to calculate RD estimates for donut RD design
get_estimate_NPAV_donut <- function(q) {
  
  Voters_NPAV_donut <- Voters_NPAV_All %>%
    filter(abs_dist >= q_abs_dist_NPAV[q+1],
           abs_dist <= 10000)
  
  result_NPAV_donut <- rdrobust(Voters_NPAV_donut$Turnout.2020,
                                Voters_NPAV_donut$dist_from_boundary,
                                cluster = Voters_NPAV_donut$Dist)
  
  return(result_NPAV_donut$Estimate)
}

# Obtain RD estimates for donut RD design
result_NPAV_donut <- map(0:10, get_estimate_NPAV_donut)

# Save the results for future reference (to avoid re-running the analysis)
# save(result_NPAV_donut, file = "./Replication Package/result_NPAV_donut.RData")

# Collect Point Estimates, Clustered SE, and 95% CIs in a Dataframe
result_NPAV_donut_df <-
  do.call(rbind, result_NPAV_donut) %>%
  as.data.frame() %>%
  mutate(
    estimate = tau.bc*100,
    se = se.rb*100,
    percent = seq(0, 10, 1),
    lower = estimate + qnorm(0.025)*se,
    upper = estimate + qnorm(0.975)*se
  )

#*******************************************************************************
# Figure SI.14
#   Sensitivity Analysis - Donut RD
#*******************************************************************************

# Figure SI.14
Figure_NPAV_donut <- ggplot(result_NPAV_donut_df, aes(x = percent, y = estimate)) +
  geom_point(color = "blue", size = 2) +
  geom_linerange(aes(ymin = lower, ymax = upper)) +
  scale_x_continuous(limits = c(0, 10), breaks = seq(0, 10, 1)) +
  scale_y_continuous(limits = c(0, 7), breaks = seq(0, 6, 2)) +
  labs(title = "",
       x = "Percent of Observations within 1km (Closest to the Boundary) Dropped",
       y = "Donut RD Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14),
        legend.position = "bottom",
        legend.box = "vertical")

# Save Figure
ggsave("./Replication Package/Figure SI.14.pdf", Figure_NPAV_donut,
       width = 8, height = 4, units = "in", dpi = 600)

#*******************************************************************************
# Additional Analysis: Geographic Boundary-Based RD Analysis with Placebo Outcomes
#   Results Reported in Figure SI.15
#*******************************************************************************

# Prepare data for analysis
Voters_NPAV_within_10km_placebo <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Non-Perm. VBM", abs_dist <= 10000) %>%
  select(RegisteredBefore2018Gen, Turnout.2018Gen,
         RegisteredBefore2018Pri, Turnout.2018,
         RegisteredBefore2016Gen, Turnout.2016Gen,
         RegisteredBefore2016Pri, Turnout.2016,
         RegisteredBefore2014Gen, Turnout.2014Gen,
         RegisteredBefore2014Pri, Turnout.2014,
         RegisteredBefore2012Gen, Turnout.2012Gen,
         RegisteredBefore2012Pri, Turnout.2012,
         Dist, dist_from_boundary, PoliticalParty)

# Placebo RD with 2018 General Election Turnout
result_NPAV_2018Gen <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2018Gen[Voters_NPAV_within_10km_placebo$RegisteredBefore2018Gen == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2018Gen == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2018Gen == 1]
)

# Placebo RD with 2018 Primary Election Turnout
result_NPAV_2018Pri <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2018[Voters_NPAV_within_10km_placebo$RegisteredBefore2018Pri == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2018Pri == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2018Pri == 1]
)

# Placebo RD with 2016 General Election Turnout
result_NPAV_2016Gen <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2016Gen[Voters_NPAV_within_10km_placebo$RegisteredBefore2016Gen == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2016Gen == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2016Gen == 1]
)

# Placebo RD with 2016 Primary Election Turnout
result_NPAV_2016Pri <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2016[Voters_NPAV_within_10km_placebo$RegisteredBefore2016Pri == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2016Pri == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2016Pri == 1]
)

# Placebo RD with 2014 General Election Turnout
result_NPAV_2014Gen <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2014Gen[Voters_NPAV_within_10km_placebo$RegisteredBefore2014Gen == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2014Gen == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2014Gen == 1]
)

# Placebo RD with 2014 Primary Election Turnout
result_NPAV_2014Pri <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2014[Voters_NPAV_within_10km_placebo$RegisteredBefore2014Pri == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2014Pri == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2014Pri == 1]
)

# Placebo RD with 2012 General Election Turnout
result_NPAV_2012Gen <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2012Gen[Voters_NPAV_within_10km_placebo$RegisteredBefore2012Gen == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2012Gen == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2012Gen == 1]
)

# Placebo RD with 2012 Primary Election Turnout
result_NPAV_2012Pri <- rdrobust(
  Voters_NPAV_within_10km_placebo$Turnout.2012[Voters_NPAV_within_10km_placebo$RegisteredBefore2012Pri == 1]*100,
  Voters_NPAV_within_10km_placebo$dist_from_boundary[Voters_NPAV_within_10km_placebo$RegisteredBefore2012Pri == 1],
  cluster = Voters_NPAV_within_10km_placebo$Dist[Voters_NPAV_within_10km_placebo$RegisteredBefore2012Pri == 1]
)

# Collect Point Estimates, Clustered SE, and 95% CIs in a Dataframe
result_NPAV_placebo <- rbind(
  result_NPAV_2018Gen$Estimate, result_NPAV_2018Pri$Estimate,
  result_NPAV_2016Gen$Estimate, result_NPAV_2016Pri$Estimate,
  result_NPAV_2014Gen$Estimate, result_NPAV_2014Pri$Estimate,
  result_NPAV_2012Gen$Estimate, result_NPAV_2012Pri$Estimate
) %>%
  as.data.frame() %>%
  mutate(
    Election = c("2018 General", "2018 Primary", "2016 General", "2016 Primary",
                 "2014 General", "2014 Primary", "2012 General", "2012 Primary"),
    Election = factor(Election,
                      levels = rev(c("2018 General", "2018 Primary",
                                     "2016 General", "2016 Primary",
                                     "2014 General", "2014 Primary",
                                     "2012 General", "2012 Primary"))),
    lower_95 = tau.bc + qnorm(0.025)*se.rb,
    upper_95 = tau.bc + qnorm(0.975)*se.rb
  )

# Save the results for future reference (to avoid re-running the analysis)
# save(result_NPAV_placebo, file = "./Replication Package/result_NPAV_placebo.RData")

#*******************************************************************************
# Figure SI.15
#   Sensitivity Analysis - Placebo RD
#*******************************************************************************

# Figure SI.15
Figure_NPAV_placebo <- result_NPAV_placebo %>%
  ggplot(aes(x = Election, y = tau.bc)) +
  geom_point(color = "blue", size = 2) +
  geom_linerange(aes(ymin = lower_95, ymax = upper_95)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_y_continuous(limits = c(-3, 3), breaks = seq(-3, 3, 1)) +
  labs(title = "",
       x = "Election",
       y = "Placebo RD Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14))

# Save Figure
ggsave("./Replication Package/Figure SI.15.pdf", Figure_NPAV_placebo,
       width = 8, height = 4, units = "in", dpi = 600)
